{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.utils import shuffle\n",
    "from sklearn.cross_validation import train_test_split\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.metrics import classification_report\n",
    "\n",
    "pathNew = 'D:/homework/pre/'\n",
    "dataSetSampler = pd.read_csv(pathNew+'dataSetSampler.csv')\n",
    "dataSetSampler = shuffle(dataSetSampler)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataSetSampler_y = dataSetSampler['label']\n",
    "dataSetSampler_x = dataSetSampler.drop(['label'],axis=1)\n",
    "x_train, x_test, y_train, y_test = train_test_split(dataSetSampler_x, dataSetSampler_y, test_size = 0.3, random_state = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "label\n",
      "0     1097\n",
      "1     1091\n",
      "2     1112\n",
      "3     1143\n",
      "4     1084\n",
      "5     1084\n",
      "6     1107\n",
      "7     1078\n",
      "8     1098\n",
      "9     1070\n",
      "10    1081\n",
      "11    1109\n",
      "Name: label, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "#来看一下通过shuffle，train_test_split后训练集是否均匀\n",
    "tempDataFrame = pd.DataFrame(y_train,columns=['label'])\n",
    "tempDataFrame = tempDataFrame['label'].groupby(tempDataFrame['label'])\n",
    "print(tempDataFrame.count())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------train done---------------\n"
     ]
    }
   ],
   "source": [
    "forest = RandomForestClassifier(n_estimators=10000, random_state=0, n_jobs=-1)\n",
    "forest.fit(x_train, y_train)\n",
    "print('------------train done---------------')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 1) WT_Runcode                     0.099817\n",
      " 2) iTurbineOperationMode          0.041994\n",
      " 3) iBPLevel                       0.040734\n",
      " 4) iAvailabillityToday            0.038146\n",
      " 5) iYPLevel                       0.027115\n",
      " 6) iIL1_690V                      0.025338\n",
      " 7) iBlade1TempBattBox_1sec        0.024077\n",
      " 8) iIL3_690V                      0.024002\n",
      " 9) iIL2_690V                      0.023961\n",
      "10) iAvailabillityTotal            0.022775\n",
      "11) iBlade3TempInvBox_1sec         0.021736\n",
      "12) iBlade2TempBattBox_1sec        0.021670\n",
      "13) iTempGenStatorU_1sec           0.021056\n",
      "14) iTempGenStatorV_1sec           0.019180\n",
      "15) iTempGenStatorW_1sec           0.018521\n",
      "16) iPitchAngle2                   0.018511\n",
      "17) iKWhThisDay_h                  0.018486\n",
      "18) iPitchAngle3                   0.018369\n",
      "19) iPitchAngle1                   0.017715\n",
      "20) iBlade3TempBattBox_1sec        0.017575\n",
      "21) iBlade2TempMotor_1sec          0.016416\n",
      "22) iTempGenBearNDE_1sec           0.015374\n",
      "23) iTempCabinetNacelle_1sec       0.014635\n",
      "24) iTempGearBearDE_1sec           0.014356\n",
      "25) iBlade1TempMotor_1sec          0.014143\n",
      "26) iKWhOverall_h                  0.014041\n",
      "27) iOperationHoursOverall_h       0.014015\n",
      "28) iBlade3TempMotor_1sec          0.013904\n",
      "29) iBlade3TempPMCHeatsink_3sec    0.013718\n",
      "30) iBlade1TempPMMHeatsink_1sec    0.013690\n",
      "31) iBlade2TempPMMHeatsink_2sec    0.013578\n",
      "32) iTempGearBearNDE_1sec          0.013260\n",
      "33) iTempHub_1sec                  0.013247\n",
      "34) iTemp1GearOil_1sec             0.013026\n",
      "35) iSetValueGenSpeed              0.012958\n",
      "36) iTempCntr_1sec                 0.012355\n",
      "37) iGenSpeed                      0.011740\n",
      "38) iBlade3TempPMMHeatsink_3sec    0.011710\n",
      "39) iVaneDiiection                 0.011108\n",
      "40) iWindSpeed                     0.010915\n",
      "41) iBlade1TempPMCHeatsink_1sec    0.010611\n",
      "42) iBlade2TempPMCHeatsink_2sec    0.010597\n",
      "43) iBlade1TempInvBox_1sec         0.009965\n",
      "44) iTempTowerBase_1sec            0.009962\n",
      "45) iCableTwistTotal               0.009589\n",
      "46) iNacellePositionTotal          0.009217\n",
      "47) iTempCabinetTowerBase_1sec     0.009129\n",
      "48) iTempOutdoor_1sec              0.009121\n",
      "49) iTempNacelle_1sec              0.009078\n",
      "50) iActivePoweiSetPointValue      0.008807\n",
      "51) iSetValuePitchAngle            0.008624\n",
      "52) iGenPower                      0.007979\n",
      "53) iTempRotorBearB_1sec           0.007145\n",
      "54) iwindDirection                 0.006576\n",
      "55) iTempGenBearDE_1sec            0.006521\n",
      "56) iTempRotorBearA_1sec           0.005773\n",
      "57) iTempTransformer690_400V_1sec  0.005658\n",
      "58) iTempGenCoolingAir_1sec        0.005501\n",
      "59) iBlade2TempInvBox_1sec         0.004966\n",
      "60) iReactivePower                 0.004299\n",
      "61) iFiequency                     0.002834\n",
      "62) iNacellePositionLtd            0.002379\n",
      "63) iOperationHoursDay             0.002180\n",
      "64) iUL2_690V                      0.001625\n",
      "65) iUL3_690V                      0.001321\n",
      "66) iUL1_690V                      0.000898\n",
      "67) iCosPhi                        0.000283\n",
      "68) iVibrationZ                    0.000221\n",
      "69) iVibrationY                    0.000176\n",
      "70) iTempMV_1sec                   0.000002\n",
      "71) iCosPhiSetValue                0.000000\n"
     ]
    }
   ],
   "source": [
    "featureName =dataSetSampler.columns.tolist()\n",
    "featureName.pop()\n",
    "importances = forest.feature_importances_\n",
    "indices = np.argsort(importances)[::-1]\n",
    "for f in range(x_train.shape[1]):\n",
    "    print(\"%2d) %-*s %f\" % (f + 1, 30, featureName[indices[f]], importances[indices[f]]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "随机森林准确度: 0.9994678964171692\n",
      "其他指标：\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "          0       1.00      1.00      1.00       469\n",
      "          1       1.00      1.00      1.00       475\n",
      "          2       1.00      1.00      1.00       454\n",
      "          3       1.00      1.00      1.00       423\n",
      "          4       1.00      1.00      1.00       482\n",
      "          5       1.00      1.00      1.00       482\n",
      "          6       1.00      1.00      1.00       458\n",
      "          7       1.00      1.00      1.00       486\n",
      "          8       1.00      1.00      1.00       468\n",
      "          9       1.00      0.99      1.00       499\n",
      "         10       1.00      1.00      1.00       485\n",
      "         11       1.00      1.00      1.00       457\n",
      "\n",
      "avg / total       1.00      1.00      1.00      5638\n",
      "\n"
     ]
    }
   ],
   "source": [
    "y_predict = forest.predict(x_test)\n",
    "print(\"随机森林准确度:\", forest.score(x_test, y_test))\n",
    "print(\"其他指标：\\n\", classification_report(y_predict, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataSetSampler_x = dataSetSampler[['WT_Runcode','iTurbineOperationMode','iBPLevel','iAvailabillityToday','iYPLevel','iIL1_690V','iBlade1TempBattBox_1sec']]\n",
    "x_train, x_test, y_train, y_test = train_test_split(dataSetSampler_x, dataSetSampler_y, test_size = 0.3, random_state = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------train done---------------\n"
     ]
    }
   ],
   "source": [
    "forest1 = RandomForestClassifier(n_estimators=10000, random_state=0, n_jobs=-1)\n",
    "forest1.fit(x_train, y_train)\n",
    "print('------------train done---------------')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "随机森林准确度: 0.9231997162114225\n",
      "其他指标：\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "          0       1.00      1.00      1.00       448\n",
      "          1       1.00      1.00      1.00       441\n",
      "          2       1.00      1.00      1.00       470\n",
      "          3       1.00      1.00      1.00       472\n",
      "          4       1.00      1.00      1.00       475\n",
      "          5       1.00      0.52      0.68       893\n",
      "          6       1.00      1.00      1.00       498\n",
      "          7       1.00      1.00      1.00       446\n",
      "          8       1.00      1.00      1.00       448\n",
      "          9       1.00      1.00      1.00       503\n",
      "         10       0.12      1.00      0.22        61\n",
      "         11       1.00      1.00      1.00       483\n",
      "\n",
      "avg / total       0.99      0.92      0.94      5638\n",
      "\n"
     ]
    }
   ],
   "source": [
    "y_predict = forest1.predict(x_test)\n",
    "print(\"随机森林准确度:\", forest1.score(x_test, y_test))\n",
    "print(\"其他指标：\\n\", classification_report(y_predict, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataSetSamplerfilterFeature =dataSetSampler[['WT_Runcode','iTurbineOperationMode','iBPLevel','iAvailabillityToday','iYPLevel','iIL1_690V','iBlade1TempBattBox_1sec','label']]\n",
    "dataSetSamplerfilterFeature.to_csv(pathNew+'dataSetSamplerfilterFeature.csv',index=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataSet = pd.read_csv(pathNew+'dataSet.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "#dataSet数据集过大，这里就取5%拿来验证吧\n",
    "dataSet = shuffle(dataSet)\n",
    "dataSet_y = dataSet['label']\n",
    "dataSet_x = dataSet[['WT_Runcode','iTurbineOperationMode','iBPLevel','iAvailabillityToday','iYPLevel','iIL1_690V','iBlade1TempBattBox_1sec']]\n",
    "x_train, x_test, y_train, y_test = train_test_split(dataSet_x, dataSet_y, test_size = 0.05, random_state = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "随机森林准确度: 0.9982629529799341\n",
      "其他指标：\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "          0       1.00      1.00      1.00     85849\n",
      "          1       1.00      1.00      1.00       397\n",
      "          2       1.00      1.00      1.00      1679\n",
      "          3       0.00      0.00      0.00         3\n",
      "          4       1.00      1.00      1.00      2494\n",
      "          5       0.00      0.00      0.00         1\n",
      "          6       1.00      1.00      1.00     25448\n",
      "          7       1.00      1.00      1.00       365\n",
      "          8       1.00      1.00      1.00         1\n",
      "          9       1.00      0.85      0.92        93\n",
      "         10       0.00      0.00      0.00         0\n",
      "         11       1.00      0.67      0.80       535\n",
      "\n",
      "avg / total       1.00      1.00      1.00    116865\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\software\\envs\\tensorflow\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "D:\\software\\envs\\tensorflow\\lib\\site-packages\\sklearn\\metrics\\classification.py:1137: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples.\n",
      "  'recall', 'true', average, warn_for)\n"
     ]
    }
   ],
   "source": [
    "dataSet_y_predict = forest1.predict(x_test)\n",
    "print(\"随机森林准确度:\", forest1.score(x_test, y_test))\n",
    "print(\"其他指标：\\n\", classification_report(dataSet_y_predict, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataSet =dataSet[['WT_Runcode','iTurbineOperationMode','iBPLevel','iAvailabillityToday','iYPLevel','iIL1_690V','iBlade1TempBattBox_1sec','label']]\n",
    "dataSet.to_csv(pathNew+'dataSetfilterFeature.csv',index=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
